#general
require(purrr)
require(tidyverse)
require(data.table)
require(lubridate)
require(stringr)
require(ggvis)
require(ggplot2)
require(forcats)
require(ggmap)
require(highcharter)
require(broom)
require(plotly)
require(stringi)
#network plot
require(igraph)
require(ggmap)
require(sna)
require(intergraph)
require(ggnetwork)
require('visNetwork')
require(viridis)
# achieve/appendices
require(GGally)
require(networkD3)
Entities <- as.data.table(read.csv(file="../input/Entities.csv",na.strings=c("","NA"),stringsAsFactors = FALSE))
Addresses <- as.data.table(read.csv(file="../input/Addresses.csv",na.strings=c("","NA"),stringsAsFactors = FALSE))
Intermediaries <- as.data.table(read.csv(file="../input/Intermediaries.csv",na.strings=c("","NA"),stringsAsFactors = FALSE))
Officers <- as.data.table(read.csv(file="../input/Officers.csv",na.strings=c("","NA"),stringsAsFactors = FALSE))
Edges <- as.data.table(read.csv(file="../input/all_edges.csv",na.strings=c("","NA"), stringsAsFactors = FALSE))
to minimise the number of nodes and edges needed to plot
## Nodes
# Combining various identities and label them
Nodes<-rbind(Entities[,.(node_id,countries, country_codes, "Entities")],
Intermediaries[,.(node_id,countries, country_codes, "Intermediaries")],
Officers[,.(node_id,countries, country_codes, "Officers")])
colnames(Nodes)[4]<- "Identity"
some of the countries names are filled with multiple countries. such as “British Virgin Islands;Hong Kong”,
Nodes<-Nodes[is.na(countries), ':='(countries= "Unknown", country_codes = "XXX")]
## Records listed for single Country
IndividualCountry_Nodes<-Nodes[!grep(";",countries)] %>% # for single country listing
# creating id column that is unique to per country
.[,id:=.GRP, by= countries]
# Creating a unique Mapping of Country to ID
Country2ID_Map<-IndividualCountry_Nodes[,.(id,countries)]%>%
unique(., by = c("countries","id"))
#Number of countries
IndividualCountry.Agg<-Nodes[!grep(";",countries),] %>%
.[,.N,by=c("countries", "country_codes", "Identity")] %>%
.[order(-N)] %>%
.[, if(sum(N)> 5000) .SD, by=c("countries")] # filtering for only countries with more than 5k listings
# plot
hchart(IndividualCountry.Agg, "column", hcaes(x = countries, y = N, group = Identity))
# For records listing multiple countries, most of them are Entities.
data.frame(table(IndividualCountry_Nodes$Identity))
## CrossCountry Nodes, which listed multiple countries seperated with ";",
CrossCountry_Nodes<-Nodes[grep(";",countries)]
#Number of countries
Nodes[grep(";",countries)] %>%
.[,.N,by=c("countries", "country_codes", "Identity")] %>%
.[order(-N)]
# For records listing multiple countries, most of them are Entities.
data.frame(table(CrossCountry_Nodes$Identity))
# At first I thought about ignoring these, but then, these might hold valueble information regarding links, given the links between one entities/intermediates and another.
# CrossCountry_Nodes
## "British Virgin Islands;Hong Kong" is listed as seperated count as
# "Hong Kong;British Virgin Islands", Hence, need to combine them in same counts
## helper function for vapply()
striHelper <- function(x) stri_c(x[stri_order(x)], collapse = ";")
CrossCountry_Nodes$countries<-vapply(strsplit(CrossCountry_Nodes$countries, ";"), striHelper, ";")
CrossCountry_Nodes$country_codes<-vapply(strsplit(CrossCountry_Nodes$country_codes, ";"), striHelper, ";")
# Raw Number Aggregation
CrossCountryOccurance<-CrossCountry_Nodes %>%
.[,.N, by = c("countries", "country_codes")] %>%
.[order(-N)]
#Split to differentiate between countries
t.splits <- max(lengths(strsplit(CrossCountry_Nodes[,countries], ";")))
t.test <- CrossCountry_Nodes[,.(countries,country_codes)] %>%
.[, paste0("m.countries",1:t.splits):=tstrsplit(countries,";")] %>%
melt(., measure.vars = patterns("^m.*"), na.rm = T) %>%
.[,.N, by=c("value","countries")] %>%
.[order(-N)] %>%
.[, if(sum(N)> 500) .SD, by=c("countries")] # filtering for only countries with more than 500 listings
# plot
hchart(t.test, "column", hcaes(x = value, y = N, group = countries))
CrossCountry_Nodes<-CrossCountry_Nodes%>%
.[, paste0("m.countries",1:t.splits):=tstrsplit(countries,";")] %>%
# this would merge the country uniqiue id on "m.countries1" column, hence introduce slight bias into the data
# Perhaps a double merge approach might be better? such that both of the listed countries are each melted into a entry
# It would be messy though.
.[Country2ID_Map, on=c(m.countries1 = "countries"), nomatch= 0]
## And Thus we finnaly have our node_id to country id ready
Bind_Country2ID_Map<-rbindlist(
list(
CrossCountry_Nodes[,.(node_id, m.countries1, Identity, id)],
IndividualCountry_Nodes[,.(node_id, countries, Identity, id)]
)
)
##Edges
Edges_simplified<-Edges[,.(node_1, node_2)]
# Edges_simplified[complete.cases(Edges_simplified)]
Merging countries id with nodes id, simplifying the relationship
#merging data table, edges and nodes
Country_id_Edges<-Edges_simplified %>%
.[Bind_Country2ID_Map, on=c(node_1 = "node_id"), nomatch= 0] %>%
.[Bind_Country2ID_Map, on=c(node_2 = "node_id"), nomatch= 0] %>%
.[,.(id,i.id)]%>% #the "ID" is derived from country ID from node_1, the second - "I.ID" is derived from node_2
.[, .N, by=c("id","i.id")]
colnames(Country_id_Edges)<- c("from", "to", "weight")
# with ggmap version 2.6 and geocoding withing a key, it is possible for one to ran into OVER QUERY LIMIT with just a couple geocode ( as the quote is shared).
# Hence, to get it working perfectly, currenctly, one has to install ggmap v2.7 ( through github only atm), and register a google key
# devtools::install_github("dkahle/ggmap")
# install.packages("geosphere")
## To get a API key from google API
# https://developers.google.com/maps/documentation/geocoding/get-api-key
# https://stackoverflow.com/questions/36175529/getting-over-query-limit-after-one-request-with-geocode
register_google(key = "AIzaSyChW6mLIfjq1NlCd1nxg_A6z1jgtTdVmek")
filelist <- list.files("../input")
if(any(filelist=="geocodes_df.rds")){
#read the created .rds containing the require data
geocodes_df <- readRDS("../input/geocodes_df.rds")
}else{
# using geocodes ( part of ggmap package) to find the lat and lon
# perhaps not the cleanest way, some of the location will not be the most accurate.
geocodes_df <- geocode(Country2ID_Map$countries)
saveRDS(geocodes_df, "../input/geocodes_df.rds")
}
CountryIDNodes<-cbind(Country2ID_Map,geocodes_df)
Given the number of Edges, and that we are probably more interested in links that are most significant, perhaps the edgeshould be filtered by weight before plotting into network graph
# summary(Country_id_Edges$weight)
# Min. 1st Qu. Median Mean 3rd Qu. Max.
# 1.0 2.0 5.0 283.9 21.0 173200.0
http://minimaxir.com/2016/12/interactive-network/
This threshold the nodes/vertices and edges to include only those that is connected by
net <- graph.data.frame(Country_id_Edges[weight>=285, ],
CountryIDNodes[id %in%
sort(unique(
c(
Country_id_Edges[weight>=285]$from,
Country_id_Edges[weight>=285]$to)
))],
directed = TRUE)
#igraph, creating the graph entities while filtering for weight
Nodes_betweenness<- igraph::betweenness(net)
#### Nodes Enchancement
V(net)$degree <- igraph::degree(net, mode = "all")
V(net)$betweenness <-log(10+Nodes_betweenness)/log(1+max(Nodes_betweenness))
V(net)$centrality <- eigen_centrality(net, weights=E(net)$Weight)$vector
V(net)$community <- colorize(V(net)$community)
V(net)$text <- V(net)$countries
#### Edge Enhancement
#Need to manually alocate the Edge lat,lon to appropriate coordinates
end_loc <- data.table(ename=as.integer(get.edgelist(net)[,2])) %>%
.[CountryIDNodes, on= c(ename="id"), nomatch= 0]
### Setting coordinates of edges arrow
E(net)$endlat <- end_loc$lat
E(net)$endlon <- end_loc$lon
### Scaling of weight
# applying a logarithm scale to recale the weight from 0 to 1
E(net)$weight<-log(1+E(net)$weight)/log(1+max(E(net)$weight))
Forcing the nodes to be located at respective coordiantes(longitude and latitude) of the said country.
# world <- map_data("world")
# world <- world[world$region != "Antarctica",] # intercourse antarctica
df_net <- ggnetwork(net, layout = "kamadakawai", weights="weight", niter=50000)
plot <- ggplot(arrow.gap = 0.025) +
borders("world",
colour ="black", fill="#7f7f7f", size=0.10, alpha=1/2)+
geom_edges(data = df_net,aes(x = lon, y = lat, xend = endlon, yend = endlat),
size=0.4, alpha=0.25 ,
arrow = arrow(length = unit(10, "pt"), type = "closed")) +
geom_nodes(data=df_net,aes(x=lon, y=lat, xend=endlon,yend=endlat,
size=centrality, colour=sqrt(degree), text=text)) +
scale_colour_viridis() +
ggtitle("Relationship of Countries with various nodes") +
## geom_map would provide a nicer map, but proved to be problematic when chaining through ggplotly
# geom_map(data=world, map=world, aes(x=long, y=lat, map_id=region),
# color="white", fill="#7f7f7f", size=0.05, alpha=1/4) +
guides(size=FALSE, color=FALSE) +
theme_blank()+
# https://github.com/ropensci/plotly/issues/842
theme(legend.position='none') #translate to hide legend in plotly
## Warning: package 'maps' was built under R version 3.3.3
##
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
##
## map
## Warning: Ignoring unknown aesthetics: xend, yend, text
#raw plot
plot
#plotlly plot
plot %>% ggplotly(tooltip="text") %>% toWebGL()
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
## Warning: 'scattergl' objects don't have these attributes: 'hoveron'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'hoverinfo', 'hoverlabel', 'stream', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'text', 'mode', 'line', 'marker', 'connectgaps', 'fill', 'fillcolor', 'error_y', 'error_x', 'xaxis', 'yaxis', 'xcalendar', 'ycalendar', 'idssrc', 'customdatasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule'
## Warning: 'scattergl' objects don't have these attributes: 'hoveron'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'hoverinfo', 'hoverlabel', 'stream', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'text', 'mode', 'line', 'marker', 'connectgaps', 'fill', 'fillcolor', 'error_y', 'error_x', 'xaxis', 'yaxis', 'xcalendar', 'ycalendar', 'idssrc', 'customdatasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule'
## Warning: 'scattergl' objects don't have these attributes: 'hoveron'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'hoverinfo', 'hoverlabel', 'stream', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'text', 'mode', 'line', 'marker', 'connectgaps', 'fill', 'fillcolor', 'error_y', 'error_x', 'xaxis', 'yaxis', 'xcalendar', 'ycalendar', 'idssrc', 'customdatasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule'
#issue, arrow head doesn't get translated into plotly via ggplotly
#Doesn't appear to be very stable
However, given the number of nodes and edges laying the map, going this approach demand sacrifice on the visibility, perhaps a more stricter thresholding will help?
toWebGL() Doesn’t appear to be very stable at times, may require the user to click on the plot once to start rendering
These exist a host of layouts that exists to help illustrate the connectivity between nodes and edges, not to mention, the coordinates (x, y) of the nodes in the network plot could carry significant meaning as well.
df_net <- ggnetwork(net, layout = "fruchtermanreingold", weights="weight", niter=50000, arrow.gap=0)
# layout = "kamadakawai"
# arrow.gap = 0.025 #
# arrow gap default value for directed graph, but the arrows aren't carried over in plottly
# niter - This argument controls the number of iterations to be employed. Larger values take longer, but will provide a more refined layout. (Defaults to 500.)
plot <- ggplot() +
geom_edges(data = df_net,aes(x = x, y = y, xend = xend, yend = yend),
size=0.4, alpha=0.25) +
geom_nodes(data = df_net,aes(x = x, y = y, xend = xend, yend = yend,
size = degree, color = degree, text=text)) +
ggtitle("Relationship of Countries with various nodes") +
scale_colour_viridis() +
## geom_map would provide a nicer map, but proved to be problematic when chaining through ggplotly
# geom_map(data=world, map=world, aes(x=long, y=lat, map_id=region),
# color="white", fill="#7f7f7f", size=0.05, alpha=1/4) +
# scale_color_manual(labels=c("EWR", "JFK", "LGA", "Others"),
# values=c(colors, "#1a1a1a"), name="Airports") +
guides(size=FALSE, color=FALSE) +
theme_blank()+
# https://github.com/ropensci/plotly/issues/842
theme(legend.position='none') #translate to hide legend in plotly
## Warning: Ignoring unknown aesthetics: xend, yend, text
#raw plot
plot
#plotlly plot
plot %>% ggplotly(tooltip="text")
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
http://kateto.net/networks-r-igraph * Well documented on their Website * Physics based * Appears to be running on Javascript/html * Aethestically pleasing, among the cleanest network plot
Issues * No direct method to overlay the nodes onto of a map, While it is possible to force them into respective coordintes(x, y) and disable Physics, to properly display them would require some work on scaling.
vis_edge<-Country_id_Edges[weight>=285,]
vis_node<-CountryIDNodes[id %in% sort(unique(
c(
Country_id_Edges[weight>=285]$from,
Country_id_Edges[weight>=285]$to)
))]
# using igraph to calculate some betweenness and degree
net<-graph.data.frame(vis_edge, vis_node, directed = TRUE)
Nodes_betweenness<-igraph::betweenness(net) # Node size
Nodes_Degree<-igraph::degree(net, mode = "all")
## Enchancement
## ?visNodes
vis_node$shape <- "dot"
vis_node$shadow <- TRUE # Nodes will drop shadow
vis_node$label <-vis_node$countries
vis_node$title <- vis_node$countries
vis_node$size <- log(10+Nodes_betweenness)/log(1+max(Nodes_betweenness))* 25 #default to 25
vis_node$borderWidth <- 2 # Node border width
vis_node$color.background <- colorize(Nodes_Degree)
vis_node$color.border <- "black"
vis_node$color.highlight.background <- "orange"
vis_node$color.highlight.border <- "darkred"
## Defining starting position of nodes as coordinates of the countries, so that their location of on graph would bear some semblance to their respective location on the map ( ie, Australia is down south etc)
vis_node$x<- vis_node$lon+180
vis_node$y<- -vis_node$lat+90
## Physics can be disable so the nodes would not be moved from the initial location (lat/lon), this is not used as it generated a plot that is rather hard to read.
# vis_node$physics<- F
# vis_edge$physics<- T
# ?visEdges
vis_edge$shadow <- FALSE # edge shadow
vis_edge$width <-log(1+vis_edge$weight)/log(1+max(vis_edge$weight)) # default to 1
vis_edge$arrows <- "middle" # arrows: 'from', 'to', or 'middle'
set.seed(1)
visNetwork(edges=vis_edge, nodes=vis_node, main="Aggregated Network plot of Countries",
height="400px", width="100%") %>%
visOptions(highlightNearest = TRUE)
## While the Initial zoom level can be setup, this require either to disable visPhysics's Stabilization or the use of visIgraphLayout, which would sacrifice the the cleanliness of the plot
## Choosing to true off stabilization option in physics would hence require the stabilization iteration to be plotted, aesthetically and physically impressive but not useful
# visEvents(type = "once", startStabilizing = "function() {
# this.moveTo({scale:0.5})}") %>%
# visPhysics(stabilization = FALSE)%>%
# %>% visIgraphLayout()
## While it yield a ok map with the Igraph Layout, it is relatively messy as the nodes and edges can be in close proximity with one another.
You will have to scroll your mouse3 to zoom towards the network plots, unfortunately setting initiall zoom level brought about some undesirable side effects, at least for the methods i tried.
Country coordinates(lat, lon) of respective nodes are used as starting location of the network plot. Hence, the final location of nodes ( countries) should bear some resemblance to their respective location on the world map
githubpage/document This network plot methods works pretty well though, asside from requiring edges to be index at 0. This is further complicated by the fact that thresholded nodes_id aren’t even continuous.
vis_edge<-vis_edge[order(from, to)]
el <- data.frame(from=vis_edge$from,
to=vis_edge$to,
value = vis_edge$width)
# http://www.r-graph-gallery.com/253-custom-network-chart-networkd3/
## Suggested method of reindexing the id, probably only works if your id is continously
# vis_node$id=as.numeric(as.factor(vis_node$id))-1
## Reindexing the nodes as d3 network/javascript are zero index
#Create a zero index column IDN
vis_node$IDN=as.numeric(factor(vis_node$id))-1
# Merged/Mapped the IDN column into "to" and "from" column in edges.
vis_edge_d3<-vis_node[,.(id,IDN)][vis_edge, on = c(id= "from")] %>%
vis_node[,.(id,IDN)][., on = c(id= "to")]
# Dropping unnecessary columns and renaming
vis_edge_d3$id<-NULL
vis_edge_d3$i.id<-NULL
colnames(vis_edge_d3)[1]<- "from"
colnames(vis_edge_d3)[2]<- "to"
# forceNetwork(Links = vis_edge_d3, Nodes = vis_node,
# # plotting parameters
# Source="from", Target="to", Value = "width",
# Group = "color.background", NodeID="countries",
# # Nodesize=6,
# opacity = 0.8,
# opacityNoHover = 0.4,
# radiusCalculation = JS(" d.nodesize^2+10"),
# linkColour = "#afafaf",
# linkWidth = JS("function(d) { return Math.sqrt(d.value); }"),
#
# # layout
# charge = -250, # if highly negative, more space betqeen nodes
#
# # general parameters
# arrows=TRUE,
# fontSize=17,
# zoom = TRUE,
# legend=F,
# width = NULL,
# height = NULL
# )
## Nodes
# Combining various identities and label them
Nodes<-rbind(
Entities[,.(node_id,countries, country_codes, nameID=name, sourceID, Identity="Entities")],
Intermediaries[,.(node_id,countries, country_codes, nameID=name, sourceID, Identity="Intermediaries")],
Officers[,.(node_id,countries, country_codes, nameID=name, sourceID, Identity="Officers")],
Addresses[,.(node_id, countries, country_codes, sourceID, Identity="Addresses")]
, fill=TRUE)
#I initially thought that address wouldn't be needed in to full network diagram, but later found out that if I exlude the addresses datasets, I couldn't form a network graph some of the nodes require connection to the node_id that can only be found in address datasets.
# These combined dataframe of nodes is not directly network graphable. As the node_id is not unique, ie. Below we explore these non unique node_id records.
Non_unique_ID <-Nodes[, fD := .N > 1, by = node_id][fD==TRUE] %>%
.[order(node_id)]
Non_unique_ID
# So, apparently some ID have entires for both Intermediaries and Officers, which probably a simply row_bind to combine them, as in these case m the node_id would not be unique.
# after some testing, it appears that such issue only occurs between intermediaries and officers.
# Dropping the officers row if the node_id is already occupied by an intermediate.
Nodes<-Nodes[!(fD==TRUE & Identity=="Officers")]
# Dropping the fD column as it is no longer needed.
Nodes$fD <- NULL
Nodes[,.N, by= sourceID]
# While I intend to use different arrows type for the disply of Edges witin the network plot, there are simply far too many relationship types as indicated by the rel_type column in Edges. Although the majority of the relationship are well covered by the top 30 types
# Hence, I will simplify it by defining 3 type of edges,
# 1) Identical relationship (only within top 30 types)#same name as
# 2) Directional relationship (only within top 30 types) #intermediary of/shareholder of/director of
# 3) Others (those not inlcuded in top 30 most popular relationship)
popular_rel_type<-Edges[,.N, by=rel_type] %>%
.[order(-N)] %>%
head(30)
# within the top 30 most common relationship
identical_relation_list <- c("similar name and address as",
"same name as",
"same company as",
"same name and registration date as",
"same address as")
Edges[rel_type %in% popular_rel_type$rel_type, Edge_Type:=1]%>%
.[!(rel_type %in% popular_rel_type$rel_type), Edge_Type:=2]%>%
.[rel_type %in%identical_relation_list, Edge_Type :=3]
##Edges
Edges_simplified<-Edges[,.(node_1, node_2, rel_type, Edge_Type, sourceID)]
colnames(Edges_simplified) <-c("from", "to", "rel_type", "edge_type", "sourceID")
## Setting network graph into directed to examine the all connections and out connections of nodes
net <- graph.data.frame(Edges_simplified, vertices=Nodes, directed = T)
### Degree, the connections of edges
nodes_degree_all <- igraph::degree(net, mode = "all")
nodes_degree_out <- igraph::degree(net, mode = "out")
# The degree of a vertex is its most basic structural property, the number of its adjacent edges.
### Betweenness, number of shortest path going through vertext,
### It doesn't seems sensible to examine the network plot with this
# nodes_betweenness<- igraph::betweenness(net)
## The vertex and edge betweenness are (roughly) defined by the number of geodesics (shortest paths) going through a vertex or an edge.
## Setting network graph into non directed to greatly simplify the cluster calculation
net <- graph.data.frame(Edges_simplified, vertices=Nodes, directed = F)
nodes_centrality <- eigen_centrality(net)
## Eigenvector centrality scores correspond to the values of the first eigenvector of the graph adjacency matrix; these scores may, in turn, be interpreted as arising from a reciprocal process in which the centrality of each actor is proportional to the sum of the centralities of those actors to whom he or she is connected.
## allocating the calculated nodes attributes into a dataframe
nodes_attributes<-data.table(names(nodes_degree_all),
unlist(nodes_degree_all),
unlist(nodes_degree_out),
unlist(nodes_centrality$vector))
colnames(nodes_attributes)<- c("nodes_id","nodes_degree_all","nodes_degree_out","centrality")
decomposed_graph_list<-decompose.graph(net)
# this return a list of seperate graph for each component
# plot(decomposed_graph_list[[231]])
##Calculation the number of members per decomposed graph and set it as a dataframe.
vcount_dt<-data.table(unlist(lapply(decomposed_graph_list,vcount)),keep.rownames=T)
vcount_dt$membership_id<-rownames(vcount_dt)
setnames(vcount_dt, "V1", "vcount")
vcount_dt[order(-vcount)]
## Choosing clusters of different size to plot
#large id=991, N=406
#medium id=185, N=166
#small id=5050, N=16
subnodes_large<-as_data_frame(decomposed_graph_list[[991]], what = c("vertices"))
subedges_large<-as_data_frame(decomposed_graph_list[[991]], what = c("edges"))
subnodes_medium<-as_data_frame(decomposed_graph_list[[185]], what = c("vertices"))
subedges_medium<-as_data_frame(decomposed_graph_list[[185]], what = c("edges"))
subnodes_small<-as_data_frame(decomposed_graph_list[[5050]], what = c("vertices"))
subedges_small<-as_data_frame(decomposed_graph_list[[5050]], what = c("edges"))
# using igraph to calculate some betweenness and degree
subnet_large<-graph.data.frame(subedges_large, subnodes_large, directed = TRUE)
Nodes_betweenness<-igraph::betweenness(subnet_large) # Node size
Nodes_Degree<-igraph::degree(subnet_large, mode = "all")
# Enchancement
# ?visNodes
subnodes_large$id<- subnodes_large$name
subnodes_large$shadow <- TRUE # Nodes will drop shadow
subnodes_large$size <- log(10+Nodes_betweenness)/log(1+max(Nodes_betweenness))* 25 #default to 25
subnodes_large$borderWidth <- 2 # Node border width
subnodes_large$color.background <- colorize(Nodes_Degree)
subnodes_large$color.border <- "black"
subnodes_large$color.highlight.background <- "orange"
subnodes_large$color.highlight.border <- "darkred"
subnodes_large$shape <- factor(subnodes_large$Identity,
levels=c("Entities","Intermediaries","Officers","Addresses"),
labels=c("dot","triangle","square","diamond"))
subnodes_large$label <-subnodes_large$nameID
subnodes_large$title <- paste0("<p>",subnodes_large$nameID,"<br>",subnodes_large$countries,"</p>")
# ?visEdges
subedges_large$shadow <- FALSE # edge shadow
subedges_large$arrows <- "middle" # arrows: 'from', 'to', or 'middle'
subedges_large$dashes <- (subedges_large$edge_type==3)
subedges_large$label<- subedges_large$rel_type
set.seed(1)
visNetwork(edges=subedges_large, nodes=subnodes_large, main="Extracted Large Cluster",
height="400px", width="100%") %>%
visIgraphLayout() %>%
visOptions(highlightNearest = list(enabled=T, degree=1, hover=F))
# using igraph to calculate some betweenness and degree
subnet_medium<-graph.data.frame(subedges_medium, subnodes_medium, directed = TRUE)
Nodes_betweenness<-igraph::betweenness(subnet_medium) # Node size
Nodes_Degree<-igraph::degree(subnet_medium, mode = "all")
# Enchancement
# ?visNodes
subnodes_medium$id<- subnodes_medium$name
subnodes_medium$shadow <- TRUE # Nodes will drop shadow
subnodes_medium$size <- log(10+Nodes_betweenness)/log(1+max(Nodes_betweenness))* 25 #default to 25
subnodes_medium$borderWidth <- 2 # Node border width
subnodes_medium$color.background <- colorize(Nodes_Degree)
subnodes_medium$color.border <- "black"
subnodes_medium$color.highlight.background <- "orange"
subnodes_medium$color.highlight.border <- "darkred"
subnodes_medium$shape <- factor(subnodes_medium$Identity,
levels=c("Entities","Intermediaries","Officers","Addresses"),
labels=c("dot","triangle","square","diamond"))
subnodes_medium$label <-subnodes_medium$nameID
subnodes_medium$title <- paste0("<p>",subnodes_medium$nameID,"<br>",subnodes_medium$countries,"</p>")
# ?visEdges
subedges_medium$shadow <- FALSE # edge shadow
subedges_medium$arrows <- "middle" # arrows: 'from', 'to', or 'middle'
subedges_medium$dashes <- (subedges_medium$edge_type==3)
subedges_medium$label<- subedges_medium$rel_type
set.seed(1)
visNetwork(edges=subedges_medium, nodes=subnodes_medium, main="Extracted Medium Cluster",
height="400px", width="100%") %>%
# visIgraphLayout() %>%
visOptions(highlightNearest = TRUE)
# using igraph to calculate some betweenness and degree
subnet_small<-graph.data.frame(subedges_small, subnodes_small, directed = TRUE)
Nodes_betweenness<-igraph::betweenness(subnet_small) # Node size
Nodes_Degree<-igraph::degree(subnet_small, mode = "all")
# Enchancement
# ?visNodes
subnodes_small$id<- subnodes_small$name
subnodes_small$shadow <- TRUE # Nodes will drop shadow
subnodes_small$label <-subnodes_small$countries
subnodes_small$title <- subnodes_small$nameID
subnodes_small$size <- log(10+Nodes_betweenness)/log(1+max(Nodes_betweenness))* 25 #default to 25
subnodes_small$borderWidth <- 2 # Node border width
subnodes_small$color.background <- colorize(Nodes_Degree)
subnodes_small$color.border <- "black"
subnodes_small$color.highlight.background <- "orange"
subnodes_small$color.highlight.border <- "darkred"
subnodes_small$shape <- factor(subnodes_small$Identity,
levels=c("Entities","Intermediaries","Officers","Addresses"),
labels=c("dot","triangle","square","diamond"))
subnodes_small$label <-subnodes_small$nameID
subnodes_small$title <- paste0("<p>",subnodes_small$nameID,"<br>",subnodes_small$countries,"</p>")
# ?visEdges
subedges_small$shadow <- FALSE # edge shadow
subedges_small$arrows <- "middle" # arrows: 'from', 'to', or 'middle'
subedges_small$dashes <- (subedges_small$edge_type==3)
subedges_small$label<- subedges_small$rel_type
set.seed(1)
visNetwork(edges=subedges_small, nodes=subnodes_small, main="Extracted Small Cluster",
height="400px", width="100%") %>%
# visIgraphLayout() %>%
visOptions(highlightNearest = TRUE)
# Exploring centrality
High_Centrality_Nodes<-nodes_attributes[centrality>=0.002681][order(-centrality)]%>%head(30)
High_Centrality_Nodes
Nodes[node_id %in% High_Centrality_Nodes$nodes_id]
#Exploring degrees
# nodes with most in connections
nodes_attributes[order(nodes_degree_out, -nodes_degree_all)]
# nodes with most outgoing connections
nodes_attributes[order(-nodes_degree_out, nodes_degree_all)]